
pacman::p_load(tidyverse,
               lubridate,
               tidymodels,
               fs,
               fst,
               tictoc)

test_qtrs <- seq.Date(as.Date(TEST_START_QTR), as.Date(TEST_END_QTR), by = "quarter")

file_info <- tibble(file_name = dir_ls("../../data_qtr_rand_no_cid_with_outcome/")) %>% 
  mutate(
    qtr = ymd(str_extract(file_name, "[0-9]{8}")),
    rand_no_cid = str_extract(file_name, "[0-9]{4}(?=.fst)")
  )

test_files <- file_info %>% 
  filter(qtr %in% test_qtrs, rand_no_cid %in% RAND_NO_CID) %>% 
  mutate(
    qtr_rand_no_cid = paste0(str_replace_all(qtr, "-", ""), "_", rand_no_cid)
  ) 

dummies_recipe <- readRDS(paste0("../../data/pipeline_outputs/", SPECIAL_SUFFIX, "/", "final_model_xgb_",
                                 TRAINING_SUFFIX, "_", RAND_NO_CID_SMALLEST_LARGEST, "_", MODEL_METRIC, "/",
                                 "dummy_recipe.rds"))

xgb_model <- readRDS(paste0("../../data/pipeline_outputs/", SPECIAL_SUFFIX, "/", "final_model_xgb_",
                            TRAINING_SUFFIX, "_", RAND_NO_CID_SMALLEST_LARGEST, "_", MODEL_METRIC, "/",
                            "final_model.rds"))

v_lasso_vars <- read_csv(paste0("../../data/pipeline_outputs/", SPECIAL_SUFFIX,
                                "/lasso_selected_vars_", TRAINING_SUFFIX, "_",
                                RAND_NO_CID_SMALLEST_LARGEST, "/", 
                                "df_lasso_vars_and_dict.csv")) %>% 
  pull(Variable)

Load_One_Qtr_Rand_No_Cid <- function(load_qtr_rand_no_cid) {
  
  df <- test_files %>%
    filter(qtr_rand_no_cid == load_qtr_rand_no_cid)
  
  read_fst(df$file_name, columns = c("cid", "qtr", "t_default", all_of(v_lasso_vars), "consumer_category")) %>%
    filter(consumer_category %in% CONSUMER_GROUP) %>% 
    mutate(
        rand_no_cid = str_sub(load_qtr_rand_no_cid, -4, -1)
    )
}

Add_Year_and_Default <- function(df_raw_data) {
  
  df_raw_data %>%
    mutate(
      year = year(qtr),
      year_outcome = (10 * year) + t_default,
      t_default = factor(t_default, levels = c(0, 1))
    )
}

Save_Fitted_Values <- function(df, load_qtr_rand_no_cid) {
  
  df_test_baked <- bake(dummies_recipe, df)
  v_fitted <- predict(xgb_model, df_test_baked, type = "prob") %>%
    pull(".pred_1")
  
  df_fitted <-  df_test_baked %>%
    transmute(
      cid,
      qtr,
      v_fitted
    )
  
  write_fst(df_fitted, paste0("../../data/pipeline_outputs/", SPECIAL_SUFFIX, "/", "fitted_xgb_",
                              SUFFIX_FITTED_, "/" , "fitted_", load_qtr_rand_no_cid, ".fst"))
  
  
}

Run_Saving_Pipeline <- function(load_qtr_rand_no_cid) {
  
  Load_One_Qtr_Rand_No_Cid(load_qtr_rand_no_cid) %>%
    Add_Year_and_Default() %>%
    Save_Fitted_Values(., load_qtr_rand_no_cid)
  
}


# cl <- makeCluster(7)
# registerDoParallel(cl)

tic(msg = "Total Loop Time")
# foreach(qtr_rand_no_cid_ = test_files$qtr_rand_no_cid, .packages = c("tidyverse", "lubridate","tidymodels","fs","fst","tictoc"),
#         .verbose = TRUE) %do% {

for(qtr_rand_no_cid_ in test_files$qtr_rand_no_cid) {
         
          tic()
          Run_Saving_Pipeline(qtr_rand_no_cid_)
          toc()
          
  }
toc()

